#! env perl
# 
# Name       : XMLDiff.pl
# 
# Purpose    : To Compare two XML files and produce difference excel sheet.
#
# How to use : run program with "--help" option or with no options.
#
# Author     : V140760 
#
# Team       : Pyramid Packaging Team 
#
# Platform   : Windows1 Specific.
#              *** If Tk module is removed it can be ported to Unix.
#
# History    :
#
# 04-May-09  : Initial version.
# 06-May-09  : - Removed Redundant code.
#              - Added extra debug information.
#              - Standardised output file name.
#              - Displays after processing information
# 15-May-09  : - Interpretation of XMLHash is changed and improved.Now more checkes are performed on
#                on output message to determine if its a change , delete or addition
#              - Extra workbook is added which shows the basic inoformation which
#                was mission in first version.
# 
#__END_OF_SCRIPT_INFORMATION__
 
 
  ## PERL MODULES USED
  
use strict ;  
use File::Basename;
use Data::Dumper ; 
use XML::SemanticDiff;
use Getopt::Long ;
use Spreadsheet::WriteExcel ;
use Tk ;
use Cwd ;

  ## VARIABLES USED IN SCRIPT
   
my ($oldfile,$newfile) ;
my $version   = "2.0" ;
my $debug     = "0" ;
my $test      = "0" ;
my $help      = "0" ;
my $xlsCnt    = "1" ; 
my $file      = "$$-XMLDiff.xls" ;
my $debugFile = "XMLDiff-debug.txt" ;
my $DiffCnt   = 0 ;
my $cwd       = cwd();

my @Changed = () ;
my @Added   = () ;
my @Deleted = () ;

my ($General,$AddFormat,$DelFormat,$Heading) ;
my ($La,$Lb,$Lc,$Ld,$Le,$Lf) ;

my $wb ;
my ($ws1,$ws2) ; 
   
sub main ()
{
      # Validate all arguments
   &ValidateArgs ;
   
      # Check if its valid XML files just by looking at the extension.
      # 
   &IsValidXML("$oldfile") ;
   &IsValidXML("$newfile") ;
   
      # Create Excel Object
   &CreateExcelObj ;
   
      # Compare Both the XML files 
   &CompareXML("$oldfile","$newfile") ;
      
      # Display Final Information 
   &DisplayInfo ; 
}
main ;

sub CreateExcelObj()
{
   $wb = Spreadsheet::WriteExcel->new($file);
   $wb->set_properties (
                        title    => 'XML difference file',
                        author   => 'Ninad Mhatre',
                        comments => 'Created with Perl and Spreadsheet::WriteExcel'
                       );
   $ws1  = $wb->add_worksheet('XML difference');
   $ws2  = $wb->add_worksheet('Information');
   
   $AddFormat = $wb->add_format( align => 'left' , color => 'green' );
   $DelFormat = $wb->add_format( align => 'left' , color => 'red');
   $Heading   = $wb->add_format( align => 'center', bold => 1);
   $General   = $wb->add_format( align => 'left' , color => 'blue');
   
   my @Header = ('Line No','Action', 'Changed Node','Parent Node','Old Value','New Value') ;
   $La = "9" ;$Lb = "10" ;$Lc = "16" ;$Ld = "16" ;$Le = "12" ;$Lf = "12" ;
   
   $ws1->write('A1',\@Header,$Heading);

   return 0 ; 
}

sub Usage()
{
   my $scriptName = basename($0) ;
   my $HELP = <<"EOH" ;
     
   $scriptName is command line utility. 
   
   Usage : $scriptName -oldxml=<file> -newxml=<file> -debug -help
   E.g      : $scriptName -oldfile=old.xml -newfile=new.xml 
    
   Desc   : -oldfile   => XML file which will be used as reference.
                -newfile => XML file which will be used for comparison.
                -debug  => Optional. Print debug information.
                -help      => Print this information.
   
Version : $version
            
EOH

   my $mw = new MainWindow;
   $mw->withdraw();
   
   my $error = $mw->messageBox(
                              -title   =>"Show Help Message", 
                              -message => $HELP,
                              -icon    => 'error',
                              -type    => 'ok') ;
   exit 0 ;
}

sub ValidateArgs()
{
   &Logthis("info","Validating Command line arguments") ;
   my $Result = GetOptions ("oldfile=s" => \$oldfile,
                            "newfile=s" => \$newfile,
                            "debug"     => \$debug,
                            "test"      => \$test,
                            "help"      => \$help);
                             
   if ( $oldfile eq "" || $newfile eq "" )
   {
      &Usage ;
   }
      &Usage if ( $help ) ;
      open(DEBUG,">$debugFile") or &Logthis("exit","Can not open file. OS Error : $!") if ( $debug ) ;
      &Logthis("info","*** Start of Debug file ***") ;
      &Logthis("info","Command Line arguments verified. All Ok.") ;
      return 0 ; 
}

# Function : To check if file is XML 
# Input    : filename
# Output   : 0 - if XML file
#            exit - if not XML file
sub IsValidXML($)
{
   &Logthis("info","Checking if [$_[0]] is valid XML file.") ;
   my $_File = $_[0] ;
   &Logthis("info","     Checking if $_File is present and readable") ;
   if ( -e "$_File" )
   {
         if ( -r "$_File" )
         {
            my($file,$ext) = split(/\./,$_File) ;
            &Logthis("exit","$_File has [$ext] extention please check.Exiting...") if ( $ext !~ /xml/i ) ;
            &GetXMLEncoding("$_File") ; 
            return 0 ; 
         } else 
         {
            &Logthis("exit","$_File file not readable.Exiting...");          
         }
   } else 
   {
      &Logthis("exit","$_File file not found.Exiting..."); 
   }
   &Logthis("info","     $_[0] is valid XML file.") ;
   return 0 ; 
}

# Function : Get the encoding format used in XML file.
# Input    : XML file name 
# Output   : Print Encoding used in XML file in debug file.
sub GetXMLEncoding($)
{
   &Logthis("info","     Checking type of encoding used in XML file") ;
   my $encoding ; 
   my $_File = $_[0] ;
   open(XMLFILE,"<$_File") or &Logthis("exit","Can not open file. OS Error : $!") ;
   chomp ( my @Temp = <XMLFILE> ) ;
   close XMLFILE ;
   my $FirstLine = $Temp[0] ;
   if ( $FirstLine =~ /encoding/i )
   {
      $FirstLine =~ m/.*encoding=(.*)/ ;
      $encoding  = $1 ;
      $encoding  =~ s/['"]//g ;
   }
   &Logthis("info","     XML file : [$_File] encoding : [$encoding]") if ( $encoding ) ;
   @Temp = () ; 
   return $encoding ; 
}

# Function : Call XML::SemanticDiff module with XML files to compare 
# Input    : Two XML files.
# Output   : Difference between XML files.
sub CompareXML($$)
{
   my $_old = $_[0] ;
   my $_new = $_[1] ;
   my $_out = "$$.tmp.out" ;
    
   &Logthis("info","Comparing XML files") ;
   &Logthis("info","   Reference file : $_old") ;
   &Logthis("info","   New XML file   : $_new") ;
   
   my $_changedElement = "" ;
   my $_parentNode     = "" ;
   my $_startLine      = "" ;
   my $_endLine        = "" ;
   my $_newValue       = "" ;
   my $_oldValue       = "" ;
   my $_action         = "" ;
   my $SKIPFLAG        = "0" ;
        
   my $XMLDiff = XML::SemanticDiff->new( keeplinenums => 1,
                                         keepdata => 1,
                                        ) ;

   open("WRITEDIFF",">$_out") or die "can not open [$_out] file for writing. OS error : $!\n" ;
   print WRITEDIFF Dumper($XMLDiff->compare($_old, $_new)) ;
   close WRITEDIFF ;
   if ( ! -s "$_out" )
   {
        &Logthis("info","     There is no Difference in files. removing diff file.") ;
        unlink "$_out" ;
        &Logthis("warn","--> There is No difference between XML files. <--") ;
        exit 0 ; 
   } else
   {
      unlink "$_out" unless ( $test ) ;
      &Logthis("info","   Difference found in XML files.") ; 
      foreach my $change ($XMLDiff->compare($_old, $_new))
      {
         $change->{'message'} =~ /.*?\'(.*?)\'.*/ ;
         $_changedElement = $1 ;
         
         $change->{'old_value'} = "" if ( "$change->{'old_value'}" eq "o" ) ;
         $change->{'new_value'} = "" if ( "$change->{'new_value'}" eq "o" ) ;
         
         # Character differences in element         => Changed
         # Attribute \'port\' has different value   => Changed
         # Attribute \'value\' missing from element => Modified
         # Rogue attribute \'val\' in element       => Added
         # Child element \'mailbox\' missing        => Deleted
      
         if ( $change->{'message'} =~ /(Character differences|Attribute .* has different value)/i )
         {
            $_action     = "Changed" ;
            $_parentNode = "$change->{'context'}" ;
            $_startLine  = "$change->{'startline'}" ;
            $_endLine    = "$change->{'endline'}" ;
            $_oldValue   = "$change->{'old_value'}" ;
            $_newValue   = "$change->{'new_value'}" ;
            my $CLine = join ("\t",$_startLine,$_action,$_changedElement,$_parentNode,$_oldValue,$_newValue) ;
            push @Changed , $CLine ; 
            
         } elsif ( $change->{'message'} =~ /(Rogue element|rogue attribute)/i )
         {
            $_action     = "Added" ;
            $_parentNode = "$change->{'context'}" ;
            $_startLine  = "$change->{'startline'}" ;
            $_endLine    = "$change->{'endline'}" ;
            $_oldValue   = "$change->{'old_value'}" ;
            $_newValue   = "$change->{'new_value'}" ;
            my $ALine = join ("\t",$_startLine,$_action,$_changedElement,$_parentNode,$_oldValue,$_newValue) ;
            push @Added , $ALine ; 
            
         } elsif ( $change->{'message'} =~ /Child element .* missing/i ) 
         {
            $_action     = "Deleted" ;
            $_parentNode = "$change->{'context'}" ;
            $_startLine  = "$change->{'startline'}" ;
            $_endLine    = "$change->{'endline'}" ;
            $_oldValue   = "$change->{'old_value'}" ;
            $_newValue   = "$change->{'new_value'}" ;
            my $DLine = join ("\t",$_startLine,$_action,$_changedElement,$_parentNode,$_oldValue,$_newValue) ;
            push @Deleted , $DLine ; 
         }
      }
      ## first print Changed values
      if ( scalar @Changed != 0 )
      {
         &PrintDiff(\@Changed) ;
         &getColumnLength(\@Changed) ;
      }
      if ( scalar @Added != 0 )
      {
         &PrintDiff(\@Added)  ;
         &getColumnLength(\@Added) ;
      }
      if ( scalar @Deleted != 0 )
      {
         &PrintDiff(\@Deleted)  ;
         &getColumnLength(\@Deleted) ;
      }
   }                             
}

# Function : print difference in Excel file.
# Input    : action , changed element , parent node , start line , end line , old value , new value.
# Output   : Entry in excel file.
sub PrintDiff($) 
{
   my @array = @{$_[0]} ;
   
   foreach my $line ( @array )
   {
      $xlsCnt++ ;
      #print "$line\n" ;
      my ($_startLine,$_action,$_element,$_parentNode,$_oldValue,$_newValue) = split (/\t/,$line) ;
      my $Index           = "A${xlsCnt}" ;  
      my @Data            = ("${_startLine}","${_action}","${_element}","${_parentNode}","${_oldValue}","${_newValue}") ; 
      if ( $_action eq "Changed" )
      {
         $ws1->write($Index,\@Data,$General) ;
      } elsif ( $_action eq "Added" )
      {
         $ws1->write($Index,\@Data,$AddFormat) ;
      } elsif ( $_action eq "Deleted" ) 
      {
         $ws1->write($Index,\@Data,$DelFormat) ;
      }
      $DiffCnt++ ;
   }
   return 0 ; 
}

# Function : Print Extra information which is useful for user.
# Input    : None.
# Output   : Message on screen.
sub DisplayInfo()
{
   my $Len = 0 ; 
   $ws2->write('A1','Information About XML Comparison',$Heading);
   $ws2->write('A3','Reference XML file') ;
   $ws2->write('A4','New XML file') ;
   $ws2->write('A6','Total No of differences') ;
   $ws2->write('A7','Output XML file') ;
   
   ## Now write information
   
   $cwd =~ s/\//\\/g ;
   
   $Len = length $oldfile if ( length $oldfile > $Len ) ;
   $Len = length $newfile if ( length $newfile > $Len ) ;
   $Len = length "${cwd}\\${file}" if ( length "${cwd}\\${file}" > $Len ) ;
   
   $ws2->write('B3',$oldfile) ;
   $ws2->write('B4',$newfile) ;
   $ws2->write('B6',$DiffCnt) ;
   $ws2->write('B7',"$cwd\\$file") ;
   
   $ws2->set_column('A:A',35) ;
   $ws2->set_column('B:B',$Len) ;
   
   print "###\n" ;
   print "#   Output file : ${cwd}\\${file}\n" ;
   print "###\n" ;
   &Logthis("info","   Total no of differences $DiffCnt") ;
   &Logthis("info","*** End Of Debug file ***")  ;
   close DEBUG ;
   &setColumnLength ;
   $wb->close ;
   exit ; 
}

# Function : To Print formated messages.
# Input    : Loglevel & Message 
# Output   : Information printed on screen or in debug file.
sub Logthis($$)
{
   my $_LogLvl = $_[0] ;
   my $_Msg    = $_[1] ;
    
   if ( $_LogLvl eq "info" )
   {
      printf DEBUG " Info    : %-72s\n",$_Msg if $debug ; 
   } elsif ( $_LogLvl eq "warn" )
   {
      printf STDERR " Message : %-72s\n",$_Msg ;
   } elsif ( $_LogLvl eq "exit" )
   {
      printf STDERR " Error   : %-72s\n",$_Msg;
      exit 12 ;
   }
   return 0 ; 
}

sub getColumnLength($)
{
   my @array = @{$_[0]} ;
   my $Val = "1.1" ;
   my ($_La,$_Lb,$_Lc,$_Ld,$_Le,$_Lf) ;
   foreach my $line ( @array )
   {
      ($_La,$_Lb,$_Lc,$_Ld,$_Le,$_Lf) = split(/\t/,$line) ;
      $La = ( ( length $_La ) * $Val ) if ( length $_La > $La ) ;
      $Lb = ( ( length $_Lb ) * $Val ) if ( length $_Lb > $Lb ) ;
      $Lc = ( ( length $_Lc ) * $Val ) if ( length $_Lc > $Lc ) ;
      $Ld = ( ( length $_Ld ) * $Val ) if ( length $_Ld > $Ld ) ;
      $Le = ( ( length $_Le ) * $Val ) if ( length $_Le > $Le ) ;
      $Lf = ( ( length $_Lf ) * $Val ) if ( length $_Lf > $Lf ) ;
   }
   return 0 ; 
}

sub setColumnLength()
{
   $ws1->set_column('A:A',$La) ;
   $ws1->set_column('B:B',$Lb) ;
   $ws1->set_column('C:C',$Lc) ;
   $ws1->set_column('D:D',$Ld) ;
   $ws1->set_column('E:E',$Le) ;
   $ws1->set_column('F:F',$Lf) ;
   return 0 ; 
}
